import pandas as pd
df=pd.read_csv('C:\\Users\\saiph\\Desktop\\sem2\\ait580\\project\\EV.csv')
df
| VIN (1-10) | County | City | State | Postal Code | Model Year | Make | Model | Electric Vehicle Type | Clean Alternative Fuel Vehicle (CAFV) Eligibility | Electric Range | Base MSRP | Legislative District | DOL Vehicle ID | Vehicle Location | Electric Utility | 2020 Census Tract | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2C4RC1N71H | Kitsap | Bremerton | WA | 98311.0 | 2017 | CHRYSLER | PACIFICA | Plug-in Hybrid Electric Vehicle (PHEV) | Clean Alternative Fuel Vehicle Eligible | 33 | 0 | 23.0 | 349437882 | POINT (-122.6466274 47.6341188) | PUGET SOUND ENERGY INC | 5.303509e+10 |
| 1 | 2C4RC1N7XL | Stevens | Colville | WA | 99114.0 | 2020 | CHRYSLER | PACIFICA | Plug-in Hybrid Electric Vehicle (PHEV) | Clean Alternative Fuel Vehicle Eligible | 32 | 0 | 7.0 | 154690532 | POINT (-117.90431 48.547075) | AVISTA CORP | 5.306595e+10 |
| 2 | KNDC3DLCXN | Yakima | Yakima | WA | 98908.0 | 2022 | KIA | EV6 | Battery Electric Vehicle (BEV) | Eligibility unknown as battery range has not b... | 0 | 0 | 14.0 | 219969144 | POINT (-120.6027202 46.5965625) | PACIFICORP | 5.307700e+10 |
| 3 | 5YJ3E1EA0J | Kitsap | Bainbridge Island | WA | 98110.0 | 2018 | TESLA | MODEL 3 | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 215 | 0 | 23.0 | 476786887 | POINT (-122.5235781 47.6293323) | PUGET SOUND ENERGY INC | 5.303509e+10 |
| 4 | 1N4AZ1CP7J | Thurston | Tumwater | WA | 98501.0 | 2018 | NISSAN | LEAF | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 151 | 0 | 35.0 | 201185253 | POINT (-122.89692 47.043535) | PUGET SOUND ENERGY INC | 5.306701e+10 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 159462 | KM8JBDA2XP | Skamania | Underwood | WA | 98651.0 | 2023 | HYUNDAI | TUCSON | Plug-in Hybrid Electric Vehicle (PHEV) | Clean Alternative Fuel Vehicle Eligible | 33 | 0 | 14.0 | 235949514 | POINT (-121.5312858 45.7348285) | BONNEVILLE POWER ADMINISTRATION||PUD NO 1 OF S... | 5.305995e+10 |
| 159463 | 1G1FZ6S02M | Skagit | Bow | WA | 98232.0 | 2021 | CHEVROLET | BOLT EV | Battery Electric Vehicle (BEV) | Eligibility unknown as battery range has not b... | 0 | 0 | 40.0 | 148544168 | POINT (-122.440636 48.5613885) | PUGET SOUND ENERGY INC | 5.305795e+10 |
| 159464 | YV4H60CX2P | King | Sammamish | WA | 98029.0 | 2023 | VOLVO | XC90 | Plug-in Hybrid Electric Vehicle (PHEV) | Clean Alternative Fuel Vehicle Eligible | 32 | 0 | 5.0 | 240200754 | POINT (-121.9993659 47.5484866) | PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) | 5.303303e+10 |
| 159465 | 5YJ3E1EA7K | Whatcom | Bellingham | WA | 98225.0 | 2019 | TESLA | MODEL 3 | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 220 | 0 | 40.0 | 156680590 | POINT (-122.486115 48.761615) | PUGET SOUND ENERGY INC||PUD NO 1 OF WHATCOM CO... | 5.307300e+10 |
| 159466 | 7SAYGDEF6N | Island | Camano Island | WA | 98282.0 | 2022 | TESLA | MODEL Y | Battery Electric Vehicle (BEV) | Eligibility unknown as battery range has not b... | 0 | 0 | 10.0 | 208285619 | POINT (-122.5310901 48.2192797) | BONNEVILLE POWER ADMINISTRATION||PUD 1 OF SNOH... | 5.302997e+10 |
159467 rows × 17 columns
df.head(5)
| VIN (1-10) | County | City | State | Postal Code | Model Year | Make | Model | Electric Vehicle Type | Clean Alternative Fuel Vehicle (CAFV) Eligibility | Electric Range | Base MSRP | Legislative District | DOL Vehicle ID | Vehicle Location | Electric Utility | 2020 Census Tract | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2C4RC1N71H | Kitsap | Bremerton | WA | 98311.0 | 2017 | CHRYSLER | PACIFICA | Plug-in Hybrid Electric Vehicle (PHEV) | Clean Alternative Fuel Vehicle Eligible | 33 | 0 | 23.0 | 349437882 | POINT (-122.6466274 47.6341188) | PUGET SOUND ENERGY INC | 5.303509e+10 |
| 1 | 2C4RC1N7XL | Stevens | Colville | WA | 99114.0 | 2020 | CHRYSLER | PACIFICA | Plug-in Hybrid Electric Vehicle (PHEV) | Clean Alternative Fuel Vehicle Eligible | 32 | 0 | 7.0 | 154690532 | POINT (-117.90431 48.547075) | AVISTA CORP | 5.306595e+10 |
| 2 | KNDC3DLCXN | Yakima | Yakima | WA | 98908.0 | 2022 | KIA | EV6 | Battery Electric Vehicle (BEV) | Eligibility unknown as battery range has not b... | 0 | 0 | 14.0 | 219969144 | POINT (-120.6027202 46.5965625) | PACIFICORP | 5.307700e+10 |
| 3 | 5YJ3E1EA0J | Kitsap | Bainbridge Island | WA | 98110.0 | 2018 | TESLA | MODEL 3 | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 215 | 0 | 23.0 | 476786887 | POINT (-122.5235781 47.6293323) | PUGET SOUND ENERGY INC | 5.303509e+10 |
| 4 | 1N4AZ1CP7J | Thurston | Tumwater | WA | 98501.0 | 2018 | NISSAN | LEAF | Battery Electric Vehicle (BEV) | Clean Alternative Fuel Vehicle Eligible | 151 | 0 | 35.0 | 201185253 | POINT (-122.89692 47.043535) | PUGET SOUND ENERGY INC | 5.306701e+10 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 159467 entries, 0 to 159466 Data columns (total 17 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 VIN (1-10) 159467 non-null object 1 County 159463 non-null object 2 City 159463 non-null object 3 State 159467 non-null object 4 Postal Code 159463 non-null float64 5 Model Year 159467 non-null int64 6 Make 159467 non-null object 7 Model 159467 non-null object 8 Electric Vehicle Type 159467 non-null object 9 Clean Alternative Fuel Vehicle (CAFV) Eligibility 159467 non-null object 10 Electric Range 159467 non-null int64 11 Base MSRP 159467 non-null int64 12 Legislative District 159106 non-null float64 13 DOL Vehicle ID 159467 non-null int64 14 Vehicle Location 159458 non-null object 15 Electric Utility 159463 non-null object 16 2020 Census Tract 159463 non-null float64 dtypes: float64(3), int64(4), object(10) memory usage: 20.7+ MB
df.shape
(159467, 17)
category=[i for i in df.columns if df[i].dtype=='object']
numerical=[i for i in df.columns if df[i].dtype!='object']
print(category)
['VIN (1-10)', 'County', 'City', 'State', 'Make', 'Model', 'Electric Vehicle Type', 'Clean Alternative Fuel Vehicle (CAFV) Eligibility', 'Vehicle Location', 'Electric Utility']
print(numerical)
['Postal Code', 'Model Year', 'Electric Range', 'Base MSRP', 'Legislative District', 'DOL Vehicle ID', '2020 Census Tract']
univarient analysis
df[numerical].describe()
| Postal Code | Model Year | Electric Range | Base MSRP | Legislative District | DOL Vehicle ID | 2020 Census Tract | |
|---|---|---|---|---|---|---|---|
| count | 159463.000000 | 159467.000000 | 159467.000000 | 159467.00000 | 159106.000000 | 1.594670e+05 | 1.594630e+05 |
| mean | 98170.373635 | 2020.192510 | 64.283319 | 1227.63716 | 29.261675 | 2.140242e+08 | 5.297287e+10 |
| std | 2453.354932 | 3.010564 | 94.634277 | 8930.03468 | 14.843878 | 7.959275e+07 | 1.621526e+09 |
| min | 1730.000000 | 1997.000000 | 0.000000 | 0.00000 | 1.000000 | 4.385000e+03 | 1.081042e+09 |
| 25% | 98052.000000 | 2018.000000 | 0.000000 | 0.00000 | 18.000000 | 1.731016e+08 | 5.303301e+10 |
| 50% | 98122.000000 | 2021.000000 | 14.000000 | 0.00000 | 33.000000 | 2.198450e+08 | 5.303303e+10 |
| 75% | 98370.000000 | 2023.000000 | 84.000000 | 0.00000 | 43.000000 | 2.448363e+08 | 5.305307e+10 |
| max | 99577.000000 | 2024.000000 | 337.000000 | 845000.00000 | 49.000000 | 4.792548e+08 | 5.603300e+10 |
df[category].describe()
| VIN (1-10) | County | City | State | Make | Model | Electric Vehicle Type | Clean Alternative Fuel Vehicle (CAFV) Eligibility | Vehicle Location | Electric Utility | |
|---|---|---|---|---|---|---|---|---|---|---|
| count | 159467 | 159467 | 159463 | 159467 | 159467 | 159467 | 159467 | 159467 | 159458 | 159463 |
| unique | 9855 | 29 | 696 | 45 | 29 | 30 | 2 | 3 | 835 | 75 |
| top | 7SAYGDEE6P | King | Seattle | WA | TESLA | MODEL Y | Battery Electric Vehicle (BEV) | Eligibility unknown as battery range has not b... | POINT (-122.12302 47.67668) | PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) |
| freq | 975 | 83413 | 26932 | 159106 | 72445 | 30841 | 124153 | 77195 | 4095 | 58884 |
# Multivariate Analysis
# Correlation Between Variables
correlation_matrix = df[numerical].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()
# Regression Analysis
sns.lmplot(x='2020 Census Tract', y='Postal Code', data=df, height=6, aspect=1.5)
plt.title('Regression Analysis: Income Level vs EV Ownership')
plt.show()
C:\Users\saiph\anaconda3\Lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
df.State.value_counts()
State WA 159106 CA 95 VA 37 MD 35 TX 22 NC 14 IL 13 CO 13 FL 9 HI 9 OR 9 AZ 9 NJ 8 CT 7 SC 7 NY 7 GA 7 NV 6 LA 6 MO 4 DC 3 MA 3 KY 3 PA 3 NE 3 AL 3 IN 2 BC 2 KS 2 ID 2 UT 2 OH 2 AR 2 AP 1 AE 1 OK 1 IA 1 DE 1 MI 1 WY 1 AK 1 MT 1 NH 1 MS 1 MN 1 Name: count, dtype: int64
df.Make.value_counts()
Make TESLA 72445 NISSAN 13795 CHEVROLET 12568 FORD 8009 BMW 6842 KIA 6754 TOYOTA 5535 VOLKSWAGEN 4424 VOLVO 3746 JEEP 3690 HYUNDAI 3671 AUDI 3246 RIVIAN 2934 CHRYSLER 2859 MERCEDES-BENZ 1189 PORSCHE 1065 MITSUBISHI 911 HONDA 833 MINI 821 POLESTAR 804 FIAT 795 SUBARU 690 SMART 276 MAZDA 269 LINCOLN 236 JAGUAR 220 LUCID 208 LEXUS 201 CADILLAC 197 GENESIS 130 LAND ROVER 48 ALFA ROMEO 19 FISKER 17 AZURE DYNAMICS 8 TH!NK 5 WHEEGO ELECTRIC CARS 3 DODGE 2 BENTLEY 2 Name: count, dtype: int64
df.Model.value_counts()
Model
MODEL Y 30841
MODEL 3 28589
LEAF 13311
MODEL S 7650
BOLT EV 6112
...
FLYING SPUR 1
918 1
OCEAN 1
S-10 PICKUP 1
BENTAYGA 1
Name: count, Length: 130, dtype: int64
df['Clean Alternative Fuel Vehicle (CAFV) Eligibility'].value_counts()
Clean Alternative Fuel Vehicle (CAFV) Eligibility Eligibility unknown as battery range has not been researched 77195 Clean Alternative Fuel Vehicle Eligible 63824 Not eligible due to low battery range 18448 Name: count, dtype: int64
df['Electric Vehicle Type'].value_counts()
Electric Vehicle Type Battery Electric Vehicle (BEV) 124153 Plug-in Hybrid Electric Vehicle (PHEV) 35314 Name: count, dtype: int64
df['Electric Range'].value_counts()
Electric Range
0 77195
215 6359
220 4063
84 3965
25 3752
...
95 3
74 3
11 2
59 1
57 1
Name: count, Length: 102, dtype: int64
#writing a function to count
def func(a):
top_types = df[a].value_counts().nlargest(29).index
df[a] = df[a].where(df[a].isin(top_types), 'Other')
counts = df[a].value_counts()
return counts
#calling a function to count county
countCounty=func('County')
import matplotlib.pyplot as plt
plt.figure(figsize=(20,8))
plt.bar(countCounty.index, countCounty.values)
plt.xlabel('County')
plt.ylabel('Count')
plt.title('Top 29 Types vs Other')
plt.xticks(rotation=45)
plt.show()
df['Clean Alternative Fuel Vehicle (CAFV) Eligibility'].value_counts().plot.pie()
<Axes: ylabel='count'>
countModel=func('Model')
plt.figure(figsize=(20,8))
plt.bar(countModel.index, countModel.values)
plt.xlabel('Model')
plt.ylabel('Models')
plt.title('Top 29 Types vs Other City')
plt.xticks(rotation=45)
plt.show()
countMake=func('Make')
plt.figure(figsize=(20,8))
plt.bar(countMake.index, countMake.values)
plt.xlabel('Make')
plt.ylabel('Count')
plt.title('Top 29 Makes vs Other Makes')
plt.xticks(rotation=45)
plt.show()
#number of duplicated rows
df.duplicated().sum()
0
#missing values
df.isnull().sum()
VIN (1-10) 0 County 4 City 4 State 0 Postal Code 4 Model Year 0 Make 0 Model 0 Electric Vehicle Type 0 Clean Alternative Fuel Vehicle (CAFV) Eligibility 0 Electric Range 0 Base MSRP 0 Legislative District 361 DOL Vehicle ID 0 Vehicle Location 9 Electric Utility 4 2020 Census Tract 4 dtype: int64
#showing the coulmns having the missing values
df.columns[df.isnull().any()]
Index(['County', 'City', 'Postal Code', 'Legislative District',
'Vehicle Location', 'Electric Utility', '2020 Census Tract'],
dtype='object')
import seaborn as sns
plt.figure(figsize=(15,10))
sns.histplot(df['Electric Vehicle Type'])
plt.show()
plt.figure(figsize=(15 , 10))
sns.kdeplot(df['Legislative District'] , fill =True , color="RED")
plt.title("Relation ship between Ownership and Legislative districts")
plt.show()
#Filtering out Washingtion States and calculating count of postal codes
washington_data = df[df['State']=='WA']['Postal Code'].value_counts().reset_index()
washington_data.columns = ['Postal Code','Count']
!pip install folium
Collecting folium Obtaining dependency information for folium from https://files.pythonhosted.org/packages/a2/1a/37c7ee1bc806d6c32621fecc72c19f6a9f9b4369e5e8f406a7c16d49f031/folium-0.15.0-py2.py3-none-any.whl.metadata Downloading folium-0.15.0-py2.py3-none-any.whl.metadata (3.4 kB) Collecting branca>=0.6.0 (from folium) Obtaining dependency information for branca>=0.6.0 from https://files.pythonhosted.org/packages/2f/e7/603b136221de923055716d23e3047da71f92e0d8ba2c4517ce49a54fe768/branca-0.7.0-py3-none-any.whl.metadata Downloading branca-0.7.0-py3-none-any.whl.metadata (1.5 kB) Requirement already satisfied: jinja2>=2.9 in c:\users\saiph\anaconda3\lib\site-packages (from folium) (3.1.2) Requirement already satisfied: numpy in c:\users\saiph\anaconda3\lib\site-packages (from folium) (1.24.3) Requirement already satisfied: requests in c:\users\saiph\anaconda3\lib\site-packages (from folium) (2.31.0) Requirement already satisfied: MarkupSafe>=2.0 in c:\users\saiph\anaconda3\lib\site-packages (from jinja2>=2.9->folium) (2.1.1) Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\saiph\anaconda3\lib\site-packages (from requests->folium) (2.0.4) Requirement already satisfied: idna<4,>=2.5 in c:\users\saiph\anaconda3\lib\site-packages (from requests->folium) (3.4) Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\saiph\anaconda3\lib\site-packages (from requests->folium) (1.26.16) Requirement already satisfied: certifi>=2017.4.17 in c:\users\saiph\anaconda3\lib\site-packages (from requests->folium) (2023.7.22) Downloading folium-0.15.0-py2.py3-none-any.whl (100 kB) ---------------------------------------- 0.0/100.3 kB ? eta -:--:-- ---- ----------------------------------- 10.2/100.3 kB ? eta -:--:-- ----------------------- --------------- 61.4/100.3 kB 812.7 kB/s eta 0:00:01 -------------------------------------- 100.3/100.3 kB 958.1 kB/s eta 0:00:00 Downloading branca-0.7.0-py3-none-any.whl (25 kB) Installing collected packages: branca, folium Successfully installed branca-0.7.0 folium-0.15.0
#Choropleth map using Folium
import folium
map = folium.Map(location=[47.5976336,-122.2211712], zoom_start=10)
url = (
"https://raw.githubusercontent.com/OpenDataDE/State-zip-code-GeoJSON/master/wa_washington_zip_codes_geo.min.json"
)
folium.Choropleth(
geo_data= url,
name="choropleth",
data=washington_data,
columns=["Postal Code", "Count"],
key_on="properties.ZCTA5CE10",
fill_color="YlGn",
fill_opacity=0.7,
line_opacity=0.3,
legend_name="EV count",
).add_to(map)
folium.LayerControl().add_to(map)
<folium.map.LayerControl at 0x167c72ed5d0>
map